import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv("wbcd.csv")
df.head()
df.corr()
df.info()
df.shape
df.describe()
round(df.describe(),2) # round up to 2 zeros
sns.pairplot(df,hue='diagnosis',palette='Set1')
df['diagnosis'].value_counts() # value count of target variable size
sns.countplot(data=df, x='diagnosis')
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(df.drop('diagnosis',axis=1))
scaled_features = scaler.transform(df.drop('diagnosis',axis=1))
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
df_feat.head()
from sklearn.model_selection import train_test_split
from sklearn import metrics
X = scaled_features
y = df['diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
#import SVC classifier
from sklearn.svm import SVC
#import metrics to compute accuracy
from sklearn.metrics import accuracy_score
#initiacte classifier
svc = SVC()
svc.fit(X_train,y_train)
predictions = svc.predict(X_test)
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))
print('Model Accuracy Score: {0:0.4f}'.format(accuracy_score(y_test,predictions))) # Accuracy predictions
SVM classifiction linear Kernel object
model_linear = SVC(kernel = "linear")
model_linear.fit(X_train,y_train)
pred_test_linear = model_linear.predict(X_test)
np.mean(pred_test_linear==y_test)
model_poly = SVC(kernel = "poly")
model_poly.fit(X_train,y_train)
pred_test_poly = model_poly.predict(X_test)
np.mean(pred_test_poly==y_test)
model_rbf = SVC(kernel = "rbf")
model_rbf.fit(X_train,y_train)
pred_test_rbf = model_rbf.predict(X_test)
np.mean(pred_test_rbf==y_test)
model_sig = SVC(kernel="sigmoid")
model_sig.fit(X_train,y_train)
pred_test_sig = model_sig.predict(X_test)
np.mean(pred_test_sig==y_test)